
#include <string.h>
#include <stdlib.h>
#include <regex.h>

#include "config_db.h"
#include "global_var.h"

/*
Reverse-path   = Path / "<>"
Forward-path   = Path
Path           = "<" [ A-d-l ":" ] Mailbox ">"
A-d-l          = At-domain *( "," At-domain )
				; Note that this form, the so-called "source
				; route", MUST BE accepted, SHOULD NOT be
				; generated, and SHOULD be ignored.
At-domain      = "@" Domain
Domain         = sub-domain *("." sub-domain)
sub-domain     = Let-dig [Ldh-str]
Let-dig        = ALPHA / DIGIT
Ldh-str        = *( ALPHA / DIGIT / "-" ) Let-dig
Mailbox        = Local-part "@" ( Domain / address-literal )
address-literal  = "[" ( IPv4-address-literal /
				IPv6-address-literal /
				General-address-literal ) "]"
				; See Section 4.1.3
Local-part     = Dot-string / Quoted-string
				; MAY be case-sensitive
Dot-string     = Atom *("."  Atom)
Atom           = 1*atext
atext          = ALPHA / DIGIT / ; Any character except controls,
				"!" / "#" /     ;  SP, and specials.
				"$" / "%" /     ;  Used for atoms
				"&" / "'" /
				"*" / "+" /
				"-" / "/" /
				"=" / "?" /
				"^" / "_" /
				"`" / "{" /
				"|" / "}" /
				"~"
Quoted-string  = DQUOTE *QcontentSMTP DQUOTE
QcontentSMTP   = qtextSMTP / quoted-pairSMTP
quoted-pairSMTP  = %d92 %d32-126
					\    SPACE-~
				; i.e., backslash followed by any ASCII
				; graphic (including itself) or SPace
qtextSMTP      = %d32-33 / %d35-91 / %d93-126
					no \ and "
				; i.e., within a quoted string, any
				; ASCII graphic or space is permitted
				; without blackslash-quoting except
				; double-quote and the backslash itself.
String         = Atom / Quoted-string
*/

int check_local_part_format(unsigned char *str, int str_len)
{
	unsigned char *strc = malloc(sizeof(unsigned char) * (str_len + 1));
	strncpy(strc, str, str_len);
	strc[str_len] = '\0';

	regex_t reg;
	int rc;
	rc = regcomp(&reg, "^[0-9A-Za-z!#$%&'*+/=?^_`{|}~-]+(\\.[0-9A-Za-z!#$%&'*+/=?^_`{|}~-]+)*$|^\"(\\\\[0-9A-Za-z !\"#$%&'()*+,./:;<=>?@\\\\^_`{|}~-]|[0-9A-Za-z !#$%&'()*+,./:;<=>?@^_`{|}~-])*\"$", REG_EXTENDED);
	// NOTE: regex.h seems do not support [\[\]], no idea why
	rc |= regexec(&reg, strc, 0, NULL, 0);
	regfree(&reg);

	free(strc);
	return rc;
}

int check_domain_format(unsigned char *str, int str_len)
{
	unsigned char *strc = malloc(sizeof(unsigned char) * (str_len + 1));
	memset(strc, 0, sizeof(unsigned char) * (str_len + 1));
	strncpy(strc, str, str_len);
	strc[str_len] = '\0';

	regex_t reg;
	int rc;
	rc = regcomp(&reg, "^[0-9A-Za-z][0-9A-za-z\\-]*(\\.[0-9A-Za-z][0-9A-za-z\\-]*)*$", REG_EXTENDED);
	rc |= regexec(&reg, strc, 0, NULL, 0);
	regfree(&reg);

	free(strc);
	return rc;
}

int check_address_literal_format(unsigned char *str, int str_len)
{
	// "[" address "]"
	// (((\d{1,2})|(1\d{2})|(2[0-4]\d)|(25[0-5]))\.){3}((\d{1,2})|(1\d{2})|(2[0-4]\d)|(25[0-5]))
	// ^IPv6:((([0-9A-Fa-f]{1,4}:){7}([0-9A-Fa-f]{1,4}|:))|(([0-9A-Fa-f]{1,4}:){6}(:[0-9A-Fa-f]{1,4}|((25[0-5]|2[0-4]\d|1\d\d|[1-9]?\d)(\.(25[0-5]|2[0-4]\d|1\d\d|[1-9]?\d)){3})|:))|(([0-9A-Fa-f]{1,4}:){5}(((:[0-9A-Fa-f]{1,4}){1,2})|:((25[0-5]|2[0-4]\d|1\d\d|[1-9]?\d)(\.(25[0-5]|2[0-4]\d|1\d\d|[1-9]?\d)){3})|:))|(([0-9A-Fa-f]{1,4}:){4}(((:[0-9A-Fa-f]{1,4}){1,3})|((:[0-9A-Fa-f]{1,4})?:((25[0-5]|2[0-4]\d|1\d\d|[1-9]?\d)(\.(25[0-5]|2[0-4]\d|1\d\d|[1-9]?\d)){3}))|:))|(([0-9A-Fa-f]{1,4}:){3}(((:[0-9A-Fa-f]{1,4}){1,4})|((:[0-9A-Fa-f]{1,4}){0,2}:((25[0-5]|2[0-4]\d|1\d\d|[1-9]?\d)(\.(25[0-5]|2[0-4]\d|1\d\d|[1-9]?\d)){3}))|:))|(([0-9A-Fa-f]{1,4}:){2}(((:[0-9A-Fa-f]{1,4}){1,5})|((:[0-9A-Fa-f]{1,4}){0,3}:((25[0-5]|2[0-4]\d|1\d\d|[1-9]?\d)(\.(25[0-5]|2[0-4]\d|1\d\d|[1-9]?\d)){3}))|:))|(([0-9A-Fa-f]{1,4}:){1}(((:[0-9A-Fa-f]{1,4}){1,6})|((:[0-9A-Fa-f]{1,4}){0,4}:((25[0-5]|2[0-4]\d|1\d\d|[1-9]?\d)(\.(25[0-5]|2[0-4]\d|1\d\d|[1-9]?\d)){3}))|:))|(:(((:[0-9A-Fa-f]{1,4}){1,7})|((:[0-9A-Fa-f]{1,4}){0,5}:((25[0-5]|2[0-4]\d|1\d\d|[1-9]?\d)(\.(25[0-5]|2[0-4]\d|1\d\d|[1-9]?\d)){3}))|:)))(%.+)?$
	// ipv6 regex from Stephen Ryan
	unsigned char *strc = malloc(sizeof(unsigned char) * (str_len + 1));
	memset(strc, 0, sizeof(unsigned char) * (str_len + 1));
	strncpy(strc, str, str_len);
	strc[str_len] = '\0';

	regex_t reg;
	int rc;
	rc = regcomp(&reg, "^\\[(((([0-9]{1,2})|(1[0-9]{2})|(2[0-4][0-9])|(25[0-5]))\\.){3}(([0-9]{1,2})|(1[0-9]{2})|(2[0-4][0-9])|(25[0-5]))|IPv6:((([0-9A-Fa-f]{1,4}:){7}([0-9A-Fa-f]{1,4}|:))|(([0-9A-Fa-f]{1,4}:){6}(:[0-9A-Fa-f]{1,4}|((25[0-5]|2[0-4][0-9]|1[0-9][0-9]|[1-9]?[0-9])(\\.(25[0-5]|2[0-4][0-9]|1[0-9][0-9]|[1-9]?[0-9])){3})|:))|(([0-9A-Fa-f]{1,4}:){5}(((:[0-9A-Fa-f]{1,4}){1,2})|:((25[0-5]|2[0-4][0-9]|1[0-9][0-9]|[1-9]?[0-9])(\\.(25[0-5]|2[0-4][0-9]|1[0-9][0-9]|[1-9]?[0-9])){3})|:))|(([0-9A-Fa-f]{1,4}:){4}(((:[0-9A-Fa-f]{1,4}){1,3})|((:[0-9A-Fa-f]{1,4})?:((25[0-5]|2[0-4][0-9]|1[0-9][0-9]|[1-9]?[0-9])(\\.(25[0-5]|2[0-4][0-9]|1[0-9][0-9]|[1-9]?[0-9])){3}))|:))|(([0-9A-Fa-f]{1,4}:){3}(((:[0-9A-Fa-f]{1,4}){1,4})|((:[0-9A-Fa-f]{1,4}){0,2}:((25[0-5]|2[0-4][0-9]|1[0-9][0-9]|[1-9]?[0-9])(\\.(25[0-5]|2[0-4][0-9]|1[0-9][0-9]|[1-9]?[0-9])){3}))|:))|(([0-9A-Fa-f]{1,4}:){2}(((:[0-9A-Fa-f]{1,4}){1,5})|((:[0-9A-Fa-f]{1,4}){0,3}:((25[0-5]|2[0-4][0-9]|1[0-9][0-9]|[1-9]?[0-9])(\\.(25[0-5]|2[0-4][0-9]|1[0-9][0-9]|[1-9]?[0-9])){3}))|:))|(([0-9A-Fa-f]{1,4}:){1}(((:[0-9A-Fa-f]{1,4}){1,6})|((:[0-9A-Fa-f]{1,4}){0,4}:((25[0-5]|2[0-4][0-9]|1[0-9][0-9]|[1-9]?[0-9])(\\.(25[0-5]|2[0-4][0-9]|1[0-9][0-9]|[1-9]?[0-9])){3}))|:))|(:(((:[0-9A-Fa-f]{1,4}){1,7})|((:[0-9A-Fa-f]{1,4}){0,5}:((25[0-5]|2[0-4][0-9]|1[0-9][0-9]|[1-9]?[0-9])(\\.(25[0-5]|2[0-4][0-9]|1[0-9][0-9]|[1-9]?[0-9])){3}))|:)))(%.+)?)\\]$", REG_EXTENDED);
	rc |= regexec(&reg, strc, 0, NULL, 0);
	regfree(&reg);

	free(strc);
	return rc;
}

// reserved
int check_reverse_path_format(unsigned char *str, int str_len)
{
	unsigned char *strc = malloc(sizeof(unsigned char) * (str_len + 1));
	memset(strc, 0, sizeof(unsigned char) * (str_len + 1));
	strncpy(strc, str, str_len);
	strc[str_len] = '\0';

	regex_t reg;
	int rc;
	rc = regcomp(&reg, "^<>$|^<(@[0-9A-Za-z].*(,@[0-9A-Za-z].*)*:)?(([0-9A-Za-z!#$%&'*+/=?^_`{|}~-].*|\".*\")@([0-9A-Za-z].*|\\[.*\\]))>$", REG_EXTENDED);
	rc |= regexec(&reg, strc, 0, NULL, 0);
	regfree(&reg);

	free(strc);
	return rc;
}

/// @brief
/// @param str include '<', '>'
/// @param str_len
/// @return =0 ok =-1 error
int check_reverse_path(unsigned char *str, int str_len)
{
	if (str_len >= 2 && str[0] == '<' && str[1] == '>')
		return 0;

	unsigned char strc[512] = {0};
	strncpy(strc, str, str_len);

	regex_t reg;
	int rc;
	regmatch_t match[10];
	rc = regcomp(&reg, "^<>|^<(@[0-9A-Za-z].*(,@[0-9A-Za-z].*)*:)?(([0-9A-Za-z!#$%&'*+/=?^_`{|}~-].*|\".*\")@([0-9A-Za-z].*|\\[.*\\]))>", REG_EXTENDED);
	rc |= regexec(&reg, strc, 10, match, 0);
	regfree(&reg);

	if (rc != 0)
		return -1;

	/*
	0 all
	1 (@...(,@...)*:)
	2 (,@...)*
	3 l@d
	4 l
	5 d
	*/
	if (match[1].rm_so != -1 && match[1].rm_eo != -1) // (@[0-9A-Za-z].*(,@[0-9A-Za-z].*)*:)?
	{
		int comma_p = match[1].rm_so - 1;
		for (int i = match[1].rm_so; i < match[1].rm_eo; i++)
		{
			if (strc[i] == ',' || strc[i] == ':')
			{
				if (check_domain_format(strc + comma_p + 2, i - (comma_p + 2)) != 0)
					return -1;

				unsigned char d[256] = {0};
				strncpy(d, strc + comma_p + 2, i - (comma_p + 2));
				if (get_domain_ctrl(g_config_db, NULL, d) == DOMAIN_DENY)
					return -1;
				comma_p = i;
			}
		}
	}

	if (check_domain_format(strc + match[5].rm_so, match[5].rm_eo - match[5].rm_so) != 0 && check_address_literal_format(strc + match[5].rm_so, match[5].rm_eo - match[5].rm_so) != 0)
		return -1;
	unsigned char d[256] = {0};
	strncpy(d, strc + match[5].rm_so, match[5].rm_eo - match[5].rm_so);
	if (get_domain_ctrl(g_config_db, NULL, d) == DOMAIN_DENY)
		return -1;

	if (check_local_part_format(strc + match[4].rm_so, match[4].rm_eo - match[4].rm_so) != 0)
		return -1;
	unsigned char l[128] = {0};
	strncpy(l, strc + match[4].rm_so, match[4].rm_eo - match[4].rm_so);
	if (get_domain_ctrl(g_config_db, l, d) == DOMAIN_DENY)
		return -1;

	return 0;
}

/// @brief
/// @param str include '<', '>'
/// @param str_len
/// @return =0 ok =-1 error
int check_forward_path(unsigned char *str, int str_len)
{
	if (str_len >= 12 && strncmp(str, "<Postmaster>", 12) == 0)
		return 0;

	unsigned char strc[512] = {0};
	strncpy(strc, str, str_len);

	regex_t reg;
	int rc;
	regmatch_t match[10];
	rc = regcomp(&reg, "^<(@[0-9A-Za-z].*(,@[0-9A-Za-z].*)*:)?(([0-9A-Za-z!#$%&'*+/=?^_`{|}~-].*|\".*\")@([0-9A-Za-z].*|\\[.*\\]))>", REG_EXTENDED);
	rc |= regexec(&reg, strc, 10, match, 0);
	regfree(&reg);

	if (rc != 0)
		return -1;

	if (match[1].rm_so != -1 && match[1].rm_eo != -1) // (@[0-9A-Za-z].*(,@[0-9A-Za-z].*)*:)?
	{
		int comma_p = match[1].rm_so - 1;
		for (int i = match[1].rm_so; i < match[1].rm_eo; i++)
		{
			if (strc[i] == ',' || strc[i] == ':')
			{
				if (check_domain_format(strc + comma_p + 2, i - (comma_p + 2)) != 0)
					return -1;

				unsigned char d[256] = {0};
				strncpy(d, strc + comma_p + 2, i - (comma_p + 2));
				if (get_domain_ctrl(g_config_db, NULL, d) == DOMAIN_DENY)
					return -1;
				comma_p = i;
			}
		}
	}

	if (check_domain_format(strc + match[5].rm_so, match[5].rm_eo - match[5].rm_so) != 0 && check_address_literal_format(strc + match[5].rm_so, match[5].rm_eo - match[5].rm_so) != 0)
		return -1;
	unsigned char d[256] = {0};
	strncpy(d, strc + match[5].rm_so, match[5].rm_eo - match[5].rm_so);
	if (get_domain_ctrl(g_config_db, NULL, d) == DOMAIN_DENY) // TODO 严格模式forward path应该使用白名单
		return -1;

	if (check_local_part_format(strc + match[4].rm_so, match[4].rm_eo - match[4].rm_so) != 0)
		return -1;
	unsigned char l[128] = {0};
	strncpy(l, strc + match[4].rm_so, match[4].rm_eo - match[4].rm_so);
	if (get_domain_ctrl(g_config_db, l, d) == DOMAIN_DENY) // TODO 严格模式forward path应该使用白名单
		return -1;

	return 0;
}

/**
 * @brief Get the SIZE from mail from cmd
 *
 * @param str
 * @param str_len
 * @return size_t declared size, =0 unknown size
 */
size_t get_declare_size(unsigned char *str, int str_len)
{
	unsigned char strc[512] = {0};
	strncpy(strc, str, str_len);

	regex_t reg;
	int rc;
	regmatch_t match[10];
	rc = regcomp(&reg, " +SIZE=([0-9]+) *", REG_EXTENDED);
	rc |= regexec(&reg, strc, 10, match, 0);
	regfree(&reg);

	if (rc != 0)
		return 0;

	if (match[1].rm_so != -1 && match[1].rm_eo != -1)
	{
		strc[match[1].rm_eo] = '\0';
		unsigned char size_str[36] = {0};
		memcpy(size_str, strc + match[1].rm_so, match[1].rm_eo - match[1].rm_so);
		char *endptr;
		size_t r = strtoul(strc + match[1].rm_so, &endptr, 10);
		if (*endptr == '\0')
			return r;
	}

	return 0;
}
